# -*- coding: utf-8 -*
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib

# 导入小费数据集
df = pd.read_csv(r'./tips.csv', names=['总金额', '小费金额', '性别', '吸烟', '日期', '时间', '尺寸'], header=None,
                 skiprows=1)
print(df.head(10))
# 设置中文字体
matplotlib.rcParams['font.sans-serif'] = ['SimHei']
matplotlib.rcParams['font.family'] = ['sans-serif']

# 分析小费金额和总金额的关系
df.plot.scatter(x='总金额', y='小费金额', color='pink', label='小费于总金额的关系')
# plt.show()
df.describe()

fig = df.groupby('性别')['小费金额'].mean()
print(fig)
fig.plot.bar()
# plt.show()

average_tip = df.groupby('日期')['小费金额'].mean()
average_tip.plot.bar()
# plt.show()

df['慷慨程度'] = df['小费金额'] / (df['小费金额'] + df['总金额'])
print(df['慷慨程度'])


def function(x, y):
    if x and y:
        return "男性抽烟"
    elif x:
        return '男性不抽烟'
    elif y:
        return '女性抽烟'
    else:
        return '女性不抽烟'


mapping = {'Female': False, 'Male': True, 'No': False, 'Yes': True}
five = df[['性别', '吸烟', '慷慨程度']].replace(mapping)
five['影响因素'] = five.apply(lambda x: function(x['性别'], x['吸烟']), axis=1)
five = five.groupby('影响因素')['慷慨程度'].mean()
five.plot.bar()
# plt.show()


times = df.groupby('时间')['总金额'].mean()
print(times)
times.plot.bar()
plt.show()

print(pd.pivot_table(df, index='tip', values='tip'))
